﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using HtmlAgilityPack;

namespace BMS.Web.Controllers.Shared
{
	/// <summary>
	/// This is an HTML cleanup utility combining the benefits of the
	/// HtmlAgilityPack to parse raw HTML and the AntiXss library
	/// to remove potentially dangerous user input.
	///
	/// Additionally it uses a list created by Robert Beal to limit
	/// the number of allowed tags and attributes to a sensible level
	/// </summary>
	public sealed class HtmlSanitizerUtility
	{
		private static volatile HtmlSanitizerUtility _instance;
		private static object _root = new object();

		private HtmlSanitizerUtility() { }

		public static HtmlSanitizerUtility Instance
		{
			get
			{
				if (_instance == null)
					lock (_root)
						if (_instance == null)
							_instance = new HtmlSanitizerUtility();

				return _instance;
			}
		}

		private static readonly Dictionary<string, string[]> ValidHtmlTags = new Dictionary<string, string[]>
		{
			{"span", new string[]       {"style"}},
			{"label", new string[]      {"style"}},
			{"h1", new string[]         {"style"}},
			{"h2", new string[]         {"style"}},
			{"h3", new string[]         {"style"}},
			{"h4", new string[]         {"style"}},
			{"h5", new string[]         {"style"}},
			{"h6", new string[]         {"style"}},
			{"font", new string[]       {"style", "color", "face", "size"}},
			{"strong", new string[]     {"style"}},
			{"b", new string[]          {"style"}},
			{"i", new string[]          {"style"}},
			{"u", new string[]          {"style"}},
			{"strike", new string[]     {"style"}},
		};

		/// <summary>
		/// Takes raw HTML input and cleans against a whitelist
		/// </summary>
		/// <param name="source">Html source</param>
		/// <returns>Clean output</returns>
		public string SanitizeHtml(string source)
		{
			if (String.IsNullOrEmpty(source)) return String.Empty;
			HtmlDocument html = GetHtml(source);
			if (html == null) return String.Empty;

			// All the nodes
			HtmlNode allNodes = html.DocumentNode;

			// Select whitelist tag names
			string[] whitelist = (from kv in ValidHtmlTags
								  select kv.Key).ToArray();

			// Scrub tags not in whitelist
			CleanNodes(allNodes, whitelist);

			// Filter the attributes of the remaining
			foreach (KeyValuePair<string, string[]> tag in ValidHtmlTags)
			{
				IEnumerable<HtmlNode> nodes = (from n in allNodes.DescendantsAndSelf()
											   where n.Name == tag.Key
											   select n);

				// No nodes? Skip.
				if (nodes == null) continue;

				foreach (var n in nodes)
				{
					// No attributes? Skip.
					if (!n.HasAttributes) continue;

					// Get all the allowed attributes for this tag
					HtmlAttribute[] attr = n.Attributes.ToArray();
					foreach (HtmlAttribute a in attr)
					{
						if (!tag.Value.Contains(a.Name))
							a.Remove(); // Attribute wasn't in the whitelist						
						else
						{
							if (a.Name == "class" || a.Name == "style")
								a.Value = Microsoft.Security.Application.Encoder.CssEncode(a.Value);
							else
								a.Value = Microsoft.Security.Application.Encoder.HtmlAttributeEncode(a.Value);
						}
					}
				}
			}

			return allNodes.InnerHtml;
		}

		public string CssDecode(string source)
		{
			if (String.IsNullOrEmpty(source)) return String.Empty;
			HtmlDocument html = GetHtml(source);
			if (html == null) return String.Empty;

			// All the nodes
			HtmlNode allNodes = html.DocumentNode;

			// Select whitelist tag names
			string[] whitelist = (from kv in ValidHtmlTags
								  select kv.Key).ToArray();

			// Scrub tags not in whitelist
			CleanNodes(allNodes, whitelist);

			// Filter the attributes of the remaining
			foreach (KeyValuePair<string, string[]> tag in ValidHtmlTags)
			{
				IEnumerable<HtmlNode> nodes = (from n in allNodes.DescendantsAndSelf()
											   where n.Name == tag.Key
											   select n);

				// No nodes? Skip.
				if (nodes == null) continue;

				foreach (var n in nodes)
				{
					// No attributes? Skip.
					if (!n.HasAttributes) continue;

					// Get all the allowed attributes for this tag
					HtmlAttribute[] attr = n.Attributes.ToArray();
					foreach (HtmlAttribute a in attr)
					{
						if (a.Name == "style")
						{
							string[] split = a.Value.Split('\\');
							int p = 0;
							foreach (string str in split.Distinct())
							{
								try
								{
									if (str.Length >= 6)
									{
										p = int.Parse(str.Substring(0, 6), System.Globalization.NumberStyles.HexNumber);
										a.Value = a.Value.Replace(str, ((char)p).ToString() + str.Substring(6));
									}
								}
								catch { }
							}
							a.Value = a.Value.Replace("\\", "");
						}
					}
				}
			}

			return allNodes.InnerHtml;
		}

		/// <summary>
		/// Takes a raw source and removes all HTML tags
		/// </summary>
		/// <param name="source"></param>
		/// <returns></returns>
		public string StripHtml(string source)
		{
			source = SanitizeHtml(source);

			// No need to continue if we have no clean Html
			if (String.IsNullOrEmpty(source))
				return String.Empty;

			HtmlDocument html = GetHtml(source);
			StringBuilder result = new StringBuilder();

			// For each node, extract only the innerText
			foreach (HtmlNode node in html.DocumentNode.ChildNodes)
				result.Append(node.InnerText);

			return result.ToString();
		}

		/// <summary>
		/// Recursively delete nodes not in the whitelist
		/// </summary>
		private static void CleanNodes(HtmlNode node, string[] whitelist)
		{
			if (node.NodeType == HtmlNodeType.Element)
			{
				if (!whitelist.Contains(node.Name))
				{
					node.ParentNode.RemoveChild(node);
					return; // We're done
				}
			}

			if (node.HasChildNodes)
				CleanChildren(node, whitelist);
		}

		/// <summary>
		/// Apply CleanNodes to each of the child nodes
		/// </summary>
		private static void CleanChildren(HtmlNode parent, string[] whitelist)
		{
			for (int i = parent.ChildNodes.Count - 1; i >= 0; i--)
				CleanNodes(parent.ChildNodes[i], whitelist);
		}

		/// <summary>
		/// Helper function that returns an HTML document from text
		/// </summary>
		private static HtmlDocument GetHtml(string source)
		{
			HtmlDocument html = new HtmlDocument();
			html.OptionFixNestedTags = true;
			html.OptionAutoCloseOnEnd = true;
			html.OptionDefaultStreamEncoding = Encoding.UTF8;

			if (source == null) source = "";
			html.LoadHtml(source);

			// Encode any code blocks independently so they won't
			// be stripped out completely when we do a final cleanup
			foreach (var n in html.DocumentNode.DescendantsAndSelf())
			{
				if (n.Name == "code")
				{
					//** Code tag attribute vulnerability fix 28-9-12 (thanks to Natd)
					HtmlAttribute[] attr = n.Attributes.ToArray();
					foreach (HtmlAttribute a in attr)
					{
						if (a.Name != "style" && a.Name != "class") { a.Remove(); }
					} //** End fix
					n.InnerHtml =
						Microsoft.Security.Application.Encoder.HtmlEncode(n.InnerHtml);
				}
			}
			return html;
		}
	}
}